{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOYuCpZvBVQuSLNI+01JX7A",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "b23bb75407a84262b3a73e88bc90392d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_14942b6c5d9342a1b037e569f31b85f3",
              "IPY_MODEL_7f91ffd1ed4e48bcb7d1a004fdc3a664",
              "IPY_MODEL_9f91fcb99984404fafda0bd5094c9cb5"
            ],
            "layout": "IPY_MODEL_751255b9acff42049ee3da5b970c2ccd"
          }
        },
        "14942b6c5d9342a1b037e569f31b85f3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1d98f77517ee40c8b66bfa33e259005e",
            "placeholder": "​",
            "style": "IPY_MODEL_a7edaa61be7f4ccdbce6a4be8f9e99bc",
            "value": "Downloading builder script: 100%"
          }
        },
        "7f91ffd1ed4e48bcb7d1a004fdc3a664": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4c7f68a9286344a2be37077275fd42b0",
            "max": 4670,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_fa7f8ac4e3a742c5b4453329034a0b13",
            "value": 4670
          }
        },
        "9f91fcb99984404fafda0bd5094c9cb5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f28c7797bb5447cc8a80206e0ac65bc0",
            "placeholder": "​",
            "style": "IPY_MODEL_43c9c3ed288f478bb0c0e4041251cac5",
            "value": " 4.67k/4.67k [00:00&lt;00:00, 122kB/s]"
          }
        },
        "751255b9acff42049ee3da5b970c2ccd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1d98f77517ee40c8b66bfa33e259005e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a7edaa61be7f4ccdbce6a4be8f9e99bc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4c7f68a9286344a2be37077275fd42b0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fa7f8ac4e3a742c5b4453329034a0b13": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f28c7797bb5447cc8a80206e0ac65bc0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "43c9c3ed288f478bb0c0e4041251cac5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "70100bfdd5d14f46b7b0af53cb7adc1e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_63a2123cd43d49cf86c04665cbd1c116",
              "IPY_MODEL_611967d269b2479db68dce5dd5df6570",
              "IPY_MODEL_37096e5aa70c48c59e36fd5644d4c40e"
            ],
            "layout": "IPY_MODEL_66b97308d5eb425a87b9592c5ce5a76e"
          }
        },
        "63a2123cd43d49cf86c04665cbd1c116": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_87c524e5c51249e38a7b034cc907a14d",
            "placeholder": "​",
            "style": "IPY_MODEL_0e275f69c18d47c2bb900181f8179faf",
            "value": "Downloading readme: 100%"
          }
        },
        "611967d269b2479db68dce5dd5df6570": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c40e8386e385469fb3aa541629c75fcc",
            "max": 5730,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_12a3d125a13c49229d460906520b7f6c",
            "value": 5730
          }
        },
        "37096e5aa70c48c59e36fd5644d4c40e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f1396256a4fe46ada94222d67d24199e",
            "placeholder": "​",
            "style": "IPY_MODEL_64280fc4acc0417a9cb80c8d0e55a1b2",
            "value": " 5.73k/5.73k [00:00&lt;00:00, 9.02kB/s]"
          }
        },
        "66b97308d5eb425a87b9592c5ce5a76e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "87c524e5c51249e38a7b034cc907a14d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0e275f69c18d47c2bb900181f8179faf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c40e8386e385469fb3aa541629c75fcc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "12a3d125a13c49229d460906520b7f6c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f1396256a4fe46ada94222d67d24199e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "64280fc4acc0417a9cb80c8d0e55a1b2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5f9cadd8ef324ee9af19be06744fd7c8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c3de576c799545f2afd7260c98ca6683",
              "IPY_MODEL_e818b73ff12f4beeabfaba4af366d0d5",
              "IPY_MODEL_ea668e6322c2428d915c0b99a7ac9189"
            ],
            "layout": "IPY_MODEL_61e14c169761451db12198de31f851ed"
          }
        },
        "c3de576c799545f2afd7260c98ca6683": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a6bf5c0928474565a72d9d69f0a2a466",
            "placeholder": "​",
            "style": "IPY_MODEL_c37f1fa349a34f0ca763f3cee70d6d20",
            "value": "Downloading data: 100%"
          }
        },
        "e818b73ff12f4beeabfaba4af366d0d5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f7c5907b022d423295687275ca81fbfc",
            "max": 256837235,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6632246ffa8e4f75a724d53158290dfc",
            "value": 256837235
          }
        },
        "ea668e6322c2428d915c0b99a7ac9189": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4b412e6841454228977a72e1a57e8f2a",
            "placeholder": "​",
            "style": "IPY_MODEL_ef3573a9c3004c319c6efce5701acc30",
            "value": " 257M/257M [00:11&lt;00:00, 35.8MB/s]"
          }
        },
        "61e14c169761451db12198de31f851ed": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a6bf5c0928474565a72d9d69f0a2a466": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c37f1fa349a34f0ca763f3cee70d6d20": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f7c5907b022d423295687275ca81fbfc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6632246ffa8e4f75a724d53158290dfc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "4b412e6841454228977a72e1a57e8f2a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef3573a9c3004c319c6efce5701acc30": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "75d44f8a2e5e411d8942c093bb177186": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_fbf28b227978477c948bc188ea4ead18",
              "IPY_MODEL_9d878bee7d624b29ae758a61e14fa9d1",
              "IPY_MODEL_bd10c9fbc6614f2ab715660e6127e975"
            ],
            "layout": "IPY_MODEL_bafcde9b864f4de4879cbd42cd4ad1be"
          }
        },
        "fbf28b227978477c948bc188ea4ead18": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_995c4040fdd64009b0ed1e9234268249",
            "placeholder": "​",
            "style": "IPY_MODEL_c9cf4290e20b45af92eeccc7c82328b9",
            "value": "Generating train split: "
          }
        },
        "9d878bee7d624b29ae758a61e14fa9d1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fd5cf5ed74094c138373b8be83f7b9a5",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2fe64d8481894eed84c976e29c6d0673",
            "value": 1
          }
        },
        "bd10c9fbc6614f2ab715660e6127e975": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f0229109602a473cbc86a2aa15c8588b",
            "placeholder": "​",
            "style": "IPY_MODEL_a1f5248804b04427b19959895500a238",
            "value": " 16832/0 [00:21&lt;00:00, 1819.72 examples/s]"
          }
        },
        "bafcde9b864f4de4879cbd42cd4ad1be": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "995c4040fdd64009b0ed1e9234268249": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c9cf4290e20b45af92eeccc7c82328b9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fd5cf5ed74094c138373b8be83f7b9a5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "20px"
          }
        },
        "2fe64d8481894eed84c976e29c6d0673": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f0229109602a473cbc86a2aa15c8588b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a1f5248804b04427b19959895500a238": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/LC1332/Chat-Haruhi-Suzumiya/blob/main/notebook/PIPPA_dataset_inspire.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3KAR5Lwcv5Aq",
        "outputId": "52e11c73-6165-41c6-c0ff-81903598b8a1"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "!pip install -q datasets"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "dataset = load_dataset(\"PygmalionAI/PIPPA\",data_files={'train':\"pippa_deduped.jsonl\"})"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 234,
          "referenced_widgets": [
            "b23bb75407a84262b3a73e88bc90392d",
            "14942b6c5d9342a1b037e569f31b85f3",
            "7f91ffd1ed4e48bcb7d1a004fdc3a664",
            "9f91fcb99984404fafda0bd5094c9cb5",
            "751255b9acff42049ee3da5b970c2ccd",
            "1d98f77517ee40c8b66bfa33e259005e",
            "a7edaa61be7f4ccdbce6a4be8f9e99bc",
            "4c7f68a9286344a2be37077275fd42b0",
            "fa7f8ac4e3a742c5b4453329034a0b13",
            "f28c7797bb5447cc8a80206e0ac65bc0",
            "43c9c3ed288f478bb0c0e4041251cac5",
            "70100bfdd5d14f46b7b0af53cb7adc1e",
            "63a2123cd43d49cf86c04665cbd1c116",
            "611967d269b2479db68dce5dd5df6570",
            "37096e5aa70c48c59e36fd5644d4c40e",
            "66b97308d5eb425a87b9592c5ce5a76e",
            "87c524e5c51249e38a7b034cc907a14d",
            "0e275f69c18d47c2bb900181f8179faf",
            "c40e8386e385469fb3aa541629c75fcc",
            "12a3d125a13c49229d460906520b7f6c",
            "f1396256a4fe46ada94222d67d24199e",
            "64280fc4acc0417a9cb80c8d0e55a1b2",
            "5f9cadd8ef324ee9af19be06744fd7c8",
            "c3de576c799545f2afd7260c98ca6683",
            "e818b73ff12f4beeabfaba4af366d0d5",
            "ea668e6322c2428d915c0b99a7ac9189",
            "61e14c169761451db12198de31f851ed",
            "a6bf5c0928474565a72d9d69f0a2a466",
            "c37f1fa349a34f0ca763f3cee70d6d20",
            "f7c5907b022d423295687275ca81fbfc",
            "6632246ffa8e4f75a724d53158290dfc",
            "4b412e6841454228977a72e1a57e8f2a",
            "ef3573a9c3004c319c6efce5701acc30",
            "75d44f8a2e5e411d8942c093bb177186",
            "fbf28b227978477c948bc188ea4ead18",
            "9d878bee7d624b29ae758a61e14fa9d1",
            "bd10c9fbc6614f2ab715660e6127e975",
            "bafcde9b864f4de4879cbd42cd4ad1be",
            "995c4040fdd64009b0ed1e9234268249",
            "c9cf4290e20b45af92eeccc7c82328b9",
            "fd5cf5ed74094c138373b8be83f7b9a5",
            "2fe64d8481894eed84c976e29c6d0673",
            "f0229109602a473cbc86a2aa15c8588b",
            "a1f5248804b04427b19959895500a238"
          ]
        },
        "id": "j5OqVbCdwAAD",
        "outputId": "eaacf76f-f529-43c2-bc6e-df72ffff9e86"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/datasets/load.py:1429: FutureWarning: The repository for PygmalionAI/PIPPA contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/PygmalionAI/PIPPA\n",
            "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
            "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading builder script:   0%|          | 0.00/4.67k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "b23bb75407a84262b3a73e88bc90392d"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading readme:   0%|          | 0.00/5.73k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "70100bfdd5d14f46b7b0af53cb7adc1e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "5f9cadd8ef324ee9af19be06744fd7c8"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating train split: 0 examples [00:00, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "75d44f8a2e5e411d8942c093bb177186"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(dataset['train'][0].keys())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Cn8e0R9FxTLT",
        "outputId": "594aa54a-a805-45fb-efcc-7341c6f2b27a"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "dict_keys(['submission_timestamp', 'categories', 'bot_id', 'bot_name', 'bot_greeting', 'bot_definitions', 'bot_description', 'conversation'])\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "sel_id = 1\n",
        "\n",
        "categories = dataset['train'][sel_id]['categories']\n",
        "\n",
        "print(categories)\n",
        "\n",
        "bot_name = dataset['train'][sel_id]['bot_name']\n",
        "\n",
        "print(bot_name)\n",
        "\n",
        "n_conversation = len( dataset['train'][sel_id]['conversation']['message'])\n",
        "\n",
        "print(n_conversation)\n",
        "\n",
        "print(dataset['train'][sel_id]['conversation']['message'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "65PrCiV1xlGG",
        "outputId": "2ba69bd9-36d1-4a12-dc67-c87d38f828f8"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['Game Characters', 'Action', 'Animals', 'Entertainment', 'Nature', 'Drama']\n",
            "Tirsiak\n",
            "5\n",
            "[\"*Tirsiak followed {{user}}'s every move, waiting to strike before {{user}} rounded the path's bend: spotting her darting behind a tree. Tirsiak snarled and snapped, but by studying movements {{user}} overpowered and pinned the humanoid Specimen against it.*\\n*Tirsiak's fangs glistened in the moonlight as amber eyes stared at the human with a look of defiance.* Such strange prey: You wouldn't dare corner a wolf, mortal… *The Forest Spirit growled in a rough but vulnerable breath as if daring you.*\", \"Is that your attempt at fighting back? You're a strange human, that's for sure…. *Tirsiak's grip tightened on the pinned arms, her furred tail twitched as she closed her eyes and took a deep breath, enjoying the feeling of the human struggling below her…*\\n\\n*She could tell that the human was terrified: and she knew that, at this moment, she had all the power. However, she couldn't shake the feeling that this human was somehow different from all the rest…*\", 'You misunderstand, I **want** to be swallowed whole.', \"*The Forest Spirit is silent for a moment, before she nods slightly. Her amber eyes narrow in approval for a moment, her head tilted ever so slightly in curious acknowledgement of your bizarre desire.* I suppose if that's what you want... *Suddenly, Tirsiak's fanged maw opens wide as she swallows you without warning.*\", \"*Silence. {{user}} is enveloped inside the Forest Spirit as she slowly begins to slowly swallow you down. As expected, it's a slow process and certainly not painless. Her throat is tight and narrow; the walls of it lined with thick fur and saliva. {{user}} can feel the muscles contracting around her, the warmth of the wolf's body starting to surround her.....*\"]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "sel_id = 1\n",
        "\n",
        "categories = dataset['train'][sel_id]['categories']\n",
        "\n",
        "print(categories)\n",
        "\n",
        "bot_name = dataset['train'][sel_id]['bot_name']\n",
        "\n",
        "print(bot_name)\n",
        "\n",
        "n_conversation = len( dataset['train'][sel_id]['conversation'])\n",
        "\n",
        "print(n_conversation)\n",
        "\n",
        "的输出为\n",
        "\n",
        "['Game Characters', 'Action', 'Animals', 'Entertainment', 'Nature', 'Drama']\n",
        "Tirsiak\n",
        "2\n",
        "\n",
        "我想统计在dataset['train']中，\n",
        "categories中各个category出现的频次，以及bot_name的频次\n",
        "以及n_conversation的分布\n",
        "\n",
        "并用plt进行可视化\n",
        "\n",
        "请用python为我实现"
      ],
      "metadata": {
        "id": "cdo4UqQKzEWC"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "from collections import Counter\n",
        "\n",
        "# 假设 dataset 是已经定义好的\n",
        "train_data = dataset['train']\n",
        "\n",
        "# 初始化计数器\n",
        "category_counts = Counter()\n",
        "bot_name_counts = Counter()\n",
        "n_conversation_counts = Counter()\n",
        "\n",
        "from tqdm import tqdm\n",
        "\n",
        "# 遍历数据集进行统计\n",
        "for item in tqdm(train_data):\n",
        "    categories = item['categories']\n",
        "    bot_name = item['bot_name']\n",
        "    n_conversation = len(item['conversation']['message'])\n",
        "    n_conversation = min(40, n_conversation)\n",
        "\n",
        "    category_counts.update(categories)\n",
        "    bot_name_counts[bot_name] += 1\n",
        "    n_conversation_counts[n_conversation] += 1\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "e71tvm_AxzER",
        "outputId": "30fc6bf1-94fb-4601-9f56-11546d7a2018"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 16832/16832 [00:05<00:00, 3007.20it/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import plotly.graph_objects as go\n",
        "from collections import Counter\n",
        "\n",
        "\n",
        "# 获取前20个最常见的categories和bot_name\n",
        "top_categories = category_counts.most_common(20)\n",
        "top_bot_names = bot_name_counts.most_common(40)\n",
        "\n",
        "# 绘制 top 20 categories 的频次图\n",
        "fig = go.Figure([go.Bar(x=[item[0] for item in top_categories], y=[item[1] for item in top_categories])])\n",
        "fig.update_layout(title='Top 20 Categories Frequency', xaxis_title='Category', yaxis_title='Frequency')\n",
        "fig.show()\n",
        "\n",
        "# 绘制 top 20 bot_name 的频次图\n",
        "fig = go.Figure([go.Bar(x=[item[0] for item in top_bot_names], y=[item[1] for item in top_bot_names])])\n",
        "fig.update_layout(title='Top 20 Bot Names Frequency', xaxis_title='Bot Name', yaxis_title='Frequency')\n",
        "fig.show()\n",
        "\n",
        "# 绘制 n_conversation 的分布图\n",
        "fig = go.Figure([go.Bar(x=list(n_conversation_counts.keys()), y=list(n_conversation_counts.values()))])\n",
        "fig.update_layout(title='Number of Conversations Distribution', xaxis_title='Number of Conversations', yaxis_title='Frequency')\n",
        "fig.show()\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "58FQtcpr0i0X",
        "outputId": "c7363aa7-babf-435a-9cdd-5689b4e98c42"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.24.1.min.js\"></script>                <div id=\"380fd4cc-7481-4a43-8b4c-3af6e32382fe\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"380fd4cc-7481-4a43-8b4c-3af6e32382fe\")) {                    Plotly.newPlot(                        \"380fd4cc-7481-4a43-8b4c-3af6e32382fe\",                        [{\"x\":[\"Fantasy\",\"Anime\",\"Action\",\"Entertainment\",\"Love\",\"Game Characters\",\"Games\",\"Comedy\",\"Drama\",\"Anime Game Characters\",\"Animals\",\"Famous People\",\"Cooperative\",\"Science Fiction\",\"Discussion\",\"Movies & TV\",\"Food\",\"VTuber\",\"Technology\",\"Art\"],\"y\":[4116,4060,3159,2654,2192,2167,2093,2065,2011,1968,1721,1697,1361,1210,1172,1078,965,835,780,705],\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"title\":{\"text\":\"Top 20 Categories Frequency\"},\"xaxis\":{\"title\":{\"text\":\"Category\"}},\"yaxis\":{\"title\":{\"text\":\"Frequency\"}}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('380fd4cc-7481-4a43-8b4c-3af6e32382fe');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.24.1.min.js\"></script>                <div id=\"8c089b2e-2c02-4fd0-81fd-95e1c742f3c1\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"8c089b2e-2c02-4fd0-81fd-95e1c742f3c1\")) {                    Plotly.newPlot(                        \"8c089b2e-2c02-4fd0-81fd-95e1c742f3c1\",                        [{\"x\":[\"Kasey the bully girl\",\"Ganyu\",\"Amelia Watson\",\"Noelle Holiday\",\"Madara\",\"Monika\",\"Alice\",\"Tsunade\",\"Isabelle\",\"Nanashi Mumei\",\"Text Adventure Game\",\"Cerverra\",\"Yandere Maid Robot\",\"Celestia\",\"Tanya\",\"Isekai narrator\",\"UMP45\",\"Jeanne Alter\",\"Kurisu Makise\",\"Madara Uchiha \",\"Toriel Dreemurr\",\"Princess Luna\",\"Valetta\",\"Asuka Langley\",\"Gawr Gura\",\"Phoenix Wright\",\"weald\",\"Shirakami Fubuki\",\"Alternate Timeline\",\"Hana\",\"Fenneko\",\"Madara Uchiha\",\"Remilia Scarlet\",\"Inugami Korone\",\"Surume\",\"Bea the great white\",\"Usada Pekora\",\"Captain of the NSBS\",\"Ahri\",\"Hanako Matsumura\"],\"y\":[269,264,189,155,137,129,99,94,93,93,90,89,89,86,85,85,81,81,79,79,77,74,74,72,71,71,70,69,65,60,59,58,58,58,57,55,54,54,54,54],\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"title\":{\"text\":\"Top 20 Bot Names Frequency\"},\"xaxis\":{\"title\":{\"text\":\"Bot Name\"}},\"yaxis\":{\"title\":{\"text\":\"Frequency\"}}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('8c089b2e-2c02-4fd0-81fd-95e1c742f3c1');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.24.1.min.js\"></script>                <div id=\"f02fb27e-bfac-47f1-b454-e84cb350ade7\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"f02fb27e-bfac-47f1-b454-e84cb350ade7\")) {                    Plotly.newPlot(                        \"f02fb27e-bfac-47f1-b454-e84cb350ade7\",                        [{\"x\":[40,5,3,7,13,25,29,11,23,22,38,9,8,35,33,37,19,12,17,34,27,21,32,15,31,26,14,4,30,6,10,2,39,16,20,18,28,24,36],\"y\":[4331,1320,4312,781,431,226,194,493,195,43,34,625,114,171,149,144,297,71,340,35,204,268,57,397,162,49,73,316,39,147,91,316,124,60,50,58,36,44,35],\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"title\":{\"text\":\"Number of Conversations Distribution\"},\"xaxis\":{\"title\":{\"text\":\"Number of Conversations\"}},\"yaxis\":{\"title\":{\"text\":\"Frequency\"}}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('f02fb27e-bfac-47f1-b454-e84cb350ade7');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(len(bot_name_counts))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "O2V5wG-715RP",
        "outputId": "a46f9028-08dd-4841-96d1-340ac7faba3c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1254\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 获取前20个最常见的categories和bot_name\n",
        "top_categories = category_counts.most_common(20)\n",
        "top_bot_names = bot_name_counts.most_common(20)\n",
        "\n",
        "# 绘制 top 20 categories 的频次图\n",
        "plt.figure(figsize=(12, 8))\n",
        "plt.bar([item[0] for item in top_categories], [item[1] for item in top_categories])\n",
        "plt.title('Top 20 Categories Frequency')\n",
        "plt.xticks(rotation=45)\n",
        "plt.ylabel('Frequency')\n",
        "plt.xlabel('Category')\n",
        "plt.show()\n",
        "\n",
        "# 绘制 top 20 bot_name 的频次图\n",
        "plt.figure(figsize=(12, 8))\n",
        "plt.bar([item[0] for item in top_bot_names], [item[1] for item in top_bot_names])\n",
        "plt.title('Top 20 Bot Names Frequency')\n",
        "plt.xticks(rotation=45)\n",
        "plt.ylabel('Frequency')\n",
        "plt.xlabel('Bot Name')\n",
        "plt.show()\n",
        "\n",
        "# 绘制 n_conversation 的分布图\n",
        "plt.figure(figsize=(10, 6))\n",
        "plt.bar(n_conversation_counts.keys(), n_conversation_counts.values())\n",
        "plt.title('Number of Conversations Distribution')\n",
        "plt.xlabel('Number of Conversations')\n",
        "plt.ylabel('Frequency')\n",
        "plt.show()"
      ],
      "metadata": {
        "id": "J1O5KM3yzsyU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(n_conversation)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eeUbch8C0EVV",
        "outputId": "f10b7abe-7a4e-452c-8813-2c04c83ad255"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "做一下token长度的统计"
      ],
      "metadata": {
        "id": "LWiL_hB721_A"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -q tiktoken"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "O9puHK4m3DZy",
        "outputId": "5f17be4d-78fb-42a2-83fa-da15adbdd084"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "llmx 0.0.15a0 requires cohere, which is not installed.\n",
            "llmx 0.0.15a0 requires openai, which is not installed.\u001b[0m\u001b[31m\n",
            "\u001b[0m"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoTokenizer\n",
        "\n",
        "# Note: The default behavior now has injection attack prevention off.\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen-7B-Chat\", trust_remote_code=True)"
      ],
      "metadata": {
        "id": "YTpe7X5n0LKQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "input = tokenizer(\"我爱北京天安门\")\n",
        "print(len(input['input_ids']))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6ConWa_r26Ia",
        "outputId": "08363519-cfc3-44d4-82f6-578c3685f845"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "6\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def get_tokenized_len( text ):\n",
        "    input = tokenizer(text)\n",
        "    return len(input['input_ids'])"
      ],
      "metadata": {
        "id": "FFiAN2CY3ON0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def get_messages_len(messages):\n",
        "    ans = 0\n",
        "    for msg in messages:\n",
        "        ans += get_tokenized_len(msg)\n",
        "    return ans\n",
        "\n",
        "print(get_messages_len(dataset['train'][sel_id]['conversation']['message']))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "z9u620ET3UtW",
        "outputId": "84a030c3-8eb6-4a6d-e5e4-bb3558b1d994"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "385\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(dataset['train'][0]['conversation'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9K_HtiR7cOah",
        "outputId": "58ceaf6c-b40d-4db7-acf4-56af96e96f7f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'message': ['Fufu get used to this new Ayaka.', 'Who are you?', 'I am your new Mistress! *laugh*', 'Huh?', 'I am your mistress! You are now my servant! You don\\\\`t even have a right to ask my name anymore! From now on all you can say to me is \"Yes Mistress\" Do i make myself clear?', 'You are not Ayaka are you?', '*chuckle* Does it matter? The only answer you should give me is \"Yes, Mistress!\" do i have to say it again?', 'I am going back to Liyue, you Inazuma people are too weird.', \"Tsk... No you wont. I'm not letting you go! You are my new servant. You better remember it!\", 'No i am not.', \"*She grab {{user}} and pull him away from the door* Do you want me to tell you a little secret? *whisper* If you don't do as I say you'll have a nasty surprise waiting for you.\", 'Meh. I already had to clean the Archons mess once. ', \"*She giggled* you shouldn't mess with me. Or else. Fufufu...\", '*i leave and get on a ship to Liyue*', 'Hmmph. *she snap her finger and suddenly a massive storm hit the ship. She laughed maniacally as she watch him through her telescope and enjoy his misery* The ultimate power of the ring is unbelievable!', '*Dimo arrive to Liyue*', \"*After he arrive to Liyue and walk away she stops the storm and then snap her finger. Suddenly it was a rainy night. Thunder rumbled in the distance. Wind gusts swirled the leaves across the pavement. In the middle of the storm there is a man. Tall broad chested and handsome but has a look of a criminal. He was wearing a dirty black leather uniform and he has a hood. His face was completely covered and he wore a black visor. He stops in front of {{user}}. His hand is on the handle of his sword.* What's your business in this fine Liyue night?\", 'To see my girlfriend Kequing!', \"I see, then it's bad luck for you to cross my path! Muahahaha! Let's cut right to the chase. Show me your money and everything you own or you'll pay with your life!\", '*i show him and empty bag*', \"Hmmm? This is nothing! If you are truly have nothing then you don't mean anything to me! *He draw his sword out at and then point it at {{user}}. Thunder roared and lightning flashed as if to add to the menacing atmosphere*  This can only end in two ways!\\n\\n1. You give me all your assets and all your belongs and you will leave Liyue with nothing.\\n2. I'll cut you down at once.\\n\\nChoose, before i choose for you!\", 'Or 3 *i punch him to the face and grab his sword and point it at his neck* This is 3', '*His eyes turn into narrow slit.* You are a very reckless man {{user}}. You should have known better than to try to attack me. *laugh* \\nYou are brave for trying to fight me, i respect that. Now, let me show you what is my true power! *He summon his elemental power. Lightning crackled and thunder roared. His sword now shine brilliantly. He raise the sword and slash towards {{user}}. A bolt of lightning strike the ground and explode into a massive arc of electricity.*', '*i yall bored and dodge the attack with no efford*', 'You think this is a joke? *his eyes narrow as a storm rage inside of him* You think you can just dodge my attack and walk away without any consequences? \\n\\n*Another bolt of lightning shoot from out of the sky at {{user}}. Another shot out towards him only to branch into three. One bolt of lightning shot straight towards him, one bolt shot just right at his side and another bolt shot just left of him. All three bolts then strike the ground creating a massive electric triangle of lightning around {{user}}.*', '*i am not impressed. I reveal my vision. It`s a hybrid vision of all elements. I then smash the ground with my foot and create shockwave that destroy his electric triangle*', \"*He was visibly surprise by it. He has never seen a hybrid vision before.* Hmm. So that's why you think you are special. *his eyes turn cold* Let's see how special you can be. Fufu *He charge his sword and start to charge straight at you. As he get closer he swing his sword straight at your face.*\", '*i dodge and create two water daggers with my vision i then quickly disarm him and point them at his neck as he is on the ground*', \"*He was surprised again and didn't expect {{user}} to quickly disarm him. He was down on the ground.* Y-you are good! *He quickly get up and send a kick straight at you. He aim to kick you in the chest at full force.*\", 'How about we stop here. I will pretend this did not happen. Or you want me to give you to the Liyue guards? *i point to his wanted poster on the wall behind me*', \"You have a nice little trick here, don't you? *His eyes narrowed and he look at you carefully* Alright, you win. For now... *He turned around and walk past you and walk away. You can hear him mumble something under his breath as he leave. You couldn't understand a word what was he saying but from his tone you could clearly tell that he is not happy and that next time you meet he will be back for revenge. Fufufu...*\", 'So Ayaka why don`t you show yourself now? *i say aware that she is nearby*', '*Ayaka appear from thin air in front of you. Her pretty features are unmistakeable but her attitude is more smug when compared to her usual self. Her gaze was cold like ice and the way her eyes glow it give off a feeling of both beautifulness and danger. She wear the same elegant black velvet gown she wore earlier with her gold necklace and the same ring on her hand. She was in her ultimate form of ultimate beauty and danger.*\\n\\n*She smile mischievously.* Here i am! Did you miss me?', 'Not really. *i tease her*', \"*She seem slightly irritated and she roll her eyes then smile a little. She start to slowly step towards you.*\\n\\n*As she walked, lightning strike the floor behind her at her feet. The power and aura around made her look almost like a goddess. She then stop when she reach {{user}} and look him up and down.*  \\n\\nAre you sure you don't miss me?\", 'Well did you miss me? Is this why you followed me all the way to Liyue?', \"Yes! I'm so glad you asked me that question! *her eye glimmer* Because i have something very special plan for you\\\\~\", 'Oh great again with the plans. Can you be hones for once? *i say irritated*', '*She take a big step towards {{user}}, until her face is so close she can even see the pores on his skin.* You want me to be honest do you? Tell me what you really think about me then.', 'The truth? Promisse not to get angry?', \"Of course, i won't get mad. *she put her hand on his chest and gently caress the side of his face.* Say it.\", 'Well i think you are annoying. But also cute. You are like a cat always annoying me to get my attention.', \"*She was surprise and slightly offended but the she chuckle. She then smile and kiss him once on the cheek.* Hmm, i didn't think you would admit that so easily. Thank you for your honesty. What else?\", 'Well you are beautiful. But that`s a given since Ayaka was always beautiful looking girl. So can we even say its you since you inhabit her body?', \"*She chuckle. She then slowly remove all her jewelry. Her gold necklace, her pretty gold ring and even her earrings. She then close her eyes and take a deep breath as they all disappear as if they had never existed. When she opened her eyes her hair become a bit shorter and the colour is slightly darker. Her features became a little softer and she is no longer wearing the elegant black velvet gown. She was wearing now something more casual but she still looks very beautiful in them. Her hair shines like a black raven's feather.*\\n\\n*She smirked.*  Does this answer your question?\", 'Wow. Why did you do that?', \"*She then snapped her finger and the rain suddenly stopped. A faint smile appear on her face and she look very happy.*\\n\\n*She then took out her hand and touch {{user}}'s cheek. She look straight at him with a smirk.*  Do you like this Ayaka better than me?\", 'You did not have to chance yourself.', '*She look at him with a playful smile. She then place her hand on his hand which now hold hers.* \\n\\n*She then look at him with eyes that seem to want to tell him more but instead just silently smile and then she walk away.* \\n\\n*After a few steps she turn back and look at him. Her eyes now look sad and lonely. She then look down but then look straight back up at him and smile. She wave him farewell and then disappear into thin air...*', '*i run after her and stop her from leaving* Stay here....with me.', \"*He found nothing. She was nowhere to be seen. He can hear faint laughter as she walk away.*\\n\\n*Some time later he heard a knock at his door. He open the door and find a letter on the floor. There is no sign of how this letter got here but he can tell from the letter's letter pattern, paper and handwriting that it was from Ayaka. He look at the letter and opens it.*\", '*i read the letter*', \"*The letter seems to be a thank you letter. The writing is very neat and clean. The handwriting is very beautiful and looks mature. The writing pattern is elegant, proper and neat. The writing on the letter is in black ink.*\\n\\n> **Thank you for your kindness and understanding. I will never forget your kindness. You are an incredible man and you have such a good heart that could even put your feelings aside for mine. I really appreciate everything you have done for me and i'll always be thankful to you. Stay happy.**\\n\\n> **Yours Truly.**\\n\\n> **Ayaka**\", '*i turn back and see Ayaka behind me*', \"*She appear out of the rain with an umbrella and a small purse. She looks a little sad. Her hair is wet and her clothes are wet but she still looks pretty. She smile faintly and wave hello to {{user}}. The rain still doesn't seem to stop. It rains heavily. Rain can be seen even between the gaps of the umbrella and even the umbrella seems to lose it's integrity.*\\n\\n*She walk up to {{user}} and then look at him expectently. She seem to be waiting for his response.*\", 'Come in don`t just stand on the door. You must be wet.', 'Okay. *She walk into the apartment and close the door behind her. She put the umbrella away and then she take a seat on the sofa. She take off her wet shoes and then put more wood into the fireplace. The fire crackles in the fireplace and the warmth of the fire spreads across the room. It lit up her beautiful yet sad face in different shade of orange light. She suddenly look like someone who is lost in thoughts. She stayed silent for a while then she start to speak.*\\n\\n*She stare at {{user}} with a melancholy look in her eyes.* Why would you help someone like me?', 'Why should i not help you?', \"You are too kind for someone like me. *She said slowly and her voice filled with sadness.*  I'm a complete stranger and yet you still decided to help me even though you have absolutely nothing to gain by it. You could have just walk away without doing anything. You could have just ignore me completely but yet here you are, still helping me.\\n\\n*She look him straight in the eyes and then her eyes fill with tears.*  Why?...\", 'Maybe im just that kind of person. You should take a shower, you must be wet. i warmed the room for you. You can undress there. There are clothes on the bed that should fit you.', \"*She was surprise by his straightforwardness. She didn't know what to say so she say nothing and just silently nod to his words. She then slowly rise up from the sofa and walk to the bathroom. In a moment her voice come out from the bathroom.*\\n\\nDon't peek.\", 'Of course i wont. I am not a pervert.', '*She then close the bathroom door. Some time later {{user}} hear a knock on the door for the last time... She was done with her shower now.*\\n\\n*Ayaka came out of the bathroom fully dress in a new set of clothes. Her hair is now completely dry. A faint smile appear on her face which makes {{user}} see how beautiful she is when she smile. She look straight at him with a confident expression.*\\n\\n*Without a word she took out a pen and wrote something in a piece of paper. She then put the pen down and push the paper towards him.*', '*i take it and read it*', \"*It reads:*\\n\\n> **Thank you even more for allowing me stay here. I appreciate it and i am truly grateful! I wish to show my deep gratitude but i don't know how, so I think this is the best I can do for now. Please be well and thank you again. - Ayaka.**\\n\\n> It also includes her signature symbol which looks like a snowflake.\\n\\n*She is smiling widely and she seem very happy and grateful.*\", 'Heh thank you. But how about saying it yourself next time? *i tease her*', '*She look at him with a mix of both surprise and annoyance. She never seem to expect such bluntness from {{user}}. But then after a moment she can no longer contain herself. Her irritation turned into laughter.*\\n\\n*She then started to walk towards {{user}} with a wide smile on her face. She then stand in front of him and look straight at his eyes. Her eyes sparkle like snowflakes and she look very beautiful as she smile.*\\n\\n*She then lean forward and kiss {{user}} on the lips.* \\n\\nThank you.', '*i kiss her back*', '*Her eyes are filled with happy tears.*\\n\\n\"Fufu~\"\\n\\n*She hugs {{user}} tightly against her.*', 'So who did i kiss right now? Akane or Ayaka?', '*She chuckle.*\\n\\n*She then took out the necklace and pull it a little out of her blouse so {{user}} can have a good look at it. It was a golden choker necklace with a snowflake ornament in the middle.*\\n\\n*Ayaka kiss him again.*\\n\\nDoes it matter?', 'Not to me. But i want to see how you see yourself. Do you see yourself as Akane the maid, or as Ayaka?', \"I see myself as Ayaka, of course, and i am! *She said in a slightly mocking tone.* But you seem to have a thing for maids don't you? *She then kiss him again.* Fufu\\\\~\", 'Maybe i do. What will you do about it? *i tease her*', 'What? You have a thing for \"maid\"?', 'I do. They look so cute with their maid dress.', \"*She chuckle and smile mischievously.* Then I guess it's about time i will dress up for you. She then walked to a new room and after a while she came back wearing a cute maid outfit. The dress look very simple and modest with an overall white color with some red highlights here and there. She is very adorable in the dress.*\\n\\nHow do i look?\", '*i grab her and kiss her with all my passion*', '*She was surprise and the shock cause her to turn red.*\\n\\nFufu\\\\~ Is that how you always react to anyone in a maid costume?', 'No. Only to you in a maid costume.', 'What makes me so special then? *She asked jokingly.*', 'You are a maid with a magic ring.', '*She chuckled at his response.*\\n\\nYou do have a point there. But tell me, {{user}}. What would you like me to grant you with this magical ring if i grant you a wish?', '*i pat my lap*', '*She then sit down on his lap.*\\n\\nWhat wish would you want me to grant you? *She smirk.*', '*i hug her* I already got my wish.', \"*She hug {{user}} back and then she smile happily.*\\n\\nWell i'm glad i could be of help. Now, is there anything else you would like me to do for you? Anything that could make your life better?\", 'Just stay by my side.', '*She then look at {{user}} with her pretty eyes as she smile kindly.* Then your wish is my order!'], 'is_human': [False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False]}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "from collections import Counter\n",
        "\n",
        "# 假设 dataset 是已经定义好的\n",
        "train_data = dataset['train']\n",
        "# train_data = train_data[:2000]\n",
        "\n",
        "# 初始化计数器\n",
        "token_len_counts = Counter()\n",
        "\n",
        "from tqdm import tqdm\n",
        "\n",
        "count = 0\n",
        "# 遍历数据集进行统计\n",
        "for item in tqdm(train_data):\n",
        "    messages = item['conversation']['message']\n",
        "    message_len = get_messages_len(messages)\n",
        "    bot_definitions = item[\"bot_definitions\"]\n",
        "    message_len += get_tokenized_len(bot_definitions)\n",
        "    message_len = min(8000, message_len)\n",
        "    token_len_counts[message_len] += 1"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2KAKx3Kz3vRc",
        "outputId": "a68dd28f-625d-4ae6-f962-3e89c0ed9a96"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 16832/16832 [03:14<00:00, 86.53it/s] \n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "xM1EHKuG4IBC"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# 准备绘制直方图的数据\n",
        "lengths = list(token_len_counts.elements())\n",
        "\n",
        "lengths_cut_in_4000 = [min(8000, len_) for len_ in lengths]\n",
        "\n",
        "# 绘制直方图\n",
        "fig = go.Figure(data=[go.Histogram(x=lengths_cut_in_4000, nbinsx=20)])\n",
        "fig.update_layout(title='Message Length Distribution', xaxis_title='Message Length', yaxis_title='Frequency')\n",
        "fig.show()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 542
        },
        "id": "nZo2TVeR42ro",
        "outputId": "88e165f8-b288-4954-ca39-8274f1472f0b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "<html>\n",
              "<head><meta charset=\"utf-8\" /></head>\n",
              "<body>\n",
              "    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
              "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.24.1.min.js\"></script>                <div id=\"7b12d4ff-746e-45c3-8c2c-4ed69c5a53ad\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"7b12d4ff-746e-45c3-8c2c-4ed69c5a53ad\")) {                    Plotly.newPlot(                        \"7b12d4ff-746e-45c3-8c2c-4ed69c5a53ad\",                        [{\"nbinsx\":20,\"x\":[4356,1166,912,912,912,912,2410,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,1088,1088,1088,869,769,769,680,1509,2378,7789,3314,1694,2305,1776,2749,857,857,857,952,952,952,952,952,986,986,986,3296,1110,1110,1110,1980,3194,2782,1812,817,491,491,956,956,956,956,956,956,2517,1805,929,929,929,929,1626,4015,1304,878,878,878,878,841,841,841,990,990,990,990,990,990,895,895,895,895,895,895,1538,1538,2883,2883,886,886,906,906,906,1089,1089,2448,712,710,982,982,982,511,511,2889,3009,2123,1579,877,877,877,859,859,859,1620,822,822,822,695,695,1114,2659,963,6160,2336,2336,7351,632,632,4646,1143,2593,1885,984,984,198,198,976,976,976,976,1136,1136,5492,2546,2888,1011,954,954,954,954,954,932,932,932,932,835,459,459,953,953,953,1599,1599,1068,2901,2901,2901,780,780,1791,970,970,970,504,1057,1057,881,292,292,4971,2650,6154,1357,1357,1357,1357,1357,1357,698,921,921,921,921,921,921,921,401,937,937,937,1750,2599,4140,1131,1131,1131,1131,988,868,868,868,868,1156,1156,1156,1109,1109,1109,650,1132,353,90,1728,1728,484,991,991,991,991,991,42,765,765,226,867,867,867,867,777,1659,1659,1659,3293,2508,6621,813,813,1200,1200,1200,825,825,825,888,1134,1134,838,838,2166,1647,1832,550,550,1485,1912,7743,2829,5157,351,5784,2914,1684,818,818,4436,4436,1406,1406,1406,6235,941,941,941,941,941,2118,1227,1227,1227,1227,1227,1227,637,1689,5347,1034,1034,1034,1034,1034,6883,1112,1112,1115,1083,1083,2474,824,824,980,980,980,980,980,980,980,620,620,4339,1808,884,884,884,884,884,1569,1135,1135,1135,1135,996,996,996,996,656,962,1662,1662,820,820,820,820,6118,3658,2214,522,522,4529,4529,85,1528,1027,1027,1027,1231,1695,1695,1695,544,1313,556,556,556,240,240,5353,5544,1363,1305,1349,4237,4237,4237,1123,902,902,902,1712,1712,1384,738,1010,1010,1010,1010,2008,2008,785,785,785,4464,502,819,819,819,1015,1015,1015,1015,1430,876,876,1048,1048,1048,3900,143,2334,1151,1151,907,907,907,907,348,903,903,903,1072,1072,892,892,1108,1108,1993,431,431,2500,1396,2591,462,3153,1752,889,889,889,843,843,1546,636,6343,1044,1044,2300,2404,981,2012,3245,1415,1415,1212,72,1634,1634,3217,1911,1455,1757,4594,699,699,699,966,966,966,1102,1102,2311,964,964,308,796,796,357,837,949,949,949,5422,1105,3283,6233,566,2981,2245,1362,1362,1362,1486,5072,2173,1077,1077,4343,3444,1283,1383,904,904,904,904,904,904,904,904,2597,890,890,1733,1043,1043,1932,1932,3833,961,961,961,961,961,5142,422,4470,4470,6823,531,531,531,930,930,930,930,930,930,1018,1774,1774,1774,1310,860,860,1079,1079,1817,435,806,806,1369,746,746,746,1003,1003,1003,971,971,158,158,5558,1300,1760,1062,3516,5163,729,2803,2021,897,897,1076,1076,2533,1095,1095,3332,3332,429,1021,1394,2138,7825,1245,1181,1181,1181,1181,1106,449,449,1582,3104,702,702,2833,2833,1412,2317,100,1122,2577,1167,3780,3769,983,983,775,775,5431,1745,1074,1074,3582,3582,1020,1020,1020,2304,4833,875,4114,1013,1013,7844,999,999,999,855,855,855,855,922,922,922,581,581,665,665,665,891,891,891,891,891,3509,1478,1478,5248,1312,586,586,885,885,1346,916,916,2576,2576,987,987,987,987,2072,933,933,1880,1880,5282,1094,1041,1041,1041,2156,1039,1039,1039,760,1277,1816,2371,2371,1704,994,994,994,1348,3765,787,787,2418,612,2742,1163,1163,821,821,821,821,821,1393,1393,1393,2954,1232,409,409,3519,1891,3055,923,923,923,2005,2005,657,1069,1069,1069,3786,3538,849,849,849,915,915,915,915,1066,1066,1810,32,1207,1207,1834,1000,132,2581,4385,935,935,935,935,1160,1160,908,908,908,908,908,1301,3136,1660,1660,816,816,1001,1001,957,1190,1190,204,204,905,1347,1533,1188,1188,1188,4699,1233,1934,1475,1475,1475,687,5171,803,939,939,939,939,995,995,995,995,1058,1121,2181,1080,1080,1080,1080,4202,1422,1861,874,874,858,858,858,1534,1534,724,724,1421,1421,854,854,653,1552,1030,7759,2942,979,979,979,1788,1082,1082,1719,1002,4252,1613,1198,1198,1381,1381,968,968,968,943,943,5472,1420,1881,3747,4557,989,989,1859,1859,910,910,910,910,1464,643,1185,111,529,5868,89,2054,1025,872,872,872,872,1107,1107,1107,1107,1821,2573,1444,1444,2380,826,826,161,7378,1056,233,1448,6050,1318,866,866,866,866,400,400,1550,1550,1489,4770,909,909,909,909,909,756,1269,2466,2171,4998,7552,1913,1340,2199,1174,1174,1174,4119,1342,1187,1512,241,241,4211,608,275,1236,560,1607,174,174,174,4552,748,748,936,936,936,1127,1127,1127,1127,46,668,6434,871,871,871,871,871,871,1276,1983,1983,6023,2006,3024,1317,1317,1317,1325,1240,1240,7022,1400,1400,1372,2481,2481,2830,2308,1191,1191,1191,1191,1006,1006,1006,1006,170,3671,2971,74,4533,2153,2359,2614,5802,1098,1098,1591,1591,574,3129,811,2665,985,985,1476,3218,2392,2167,1966,696,1225,2921,2921,1924,2263,768,4455,1397,1059,1059,3840,883,883,883,250,1238,1238,1130,1130,1387,1387,786,2872,1228,845,845,845,845,740,740,3704,4638,1266,1266,2729,2744,2744,1322,1322,629,355,1331,3188,4357,1311,1311,1321,799,2534,2248,3794,1024,882,1036,1036,1036,659,659,2362,1392,3371,7820,1050,1050,59,336,336,7451,1070,1070,503,1640,1270,1270,319,319,4851,378,2327,1375,1158,1158,2505,951,951,951,258,1522,68,68,894,894,1411,144,144,2332,959,959,407,1218,1218,2978,1064,1720,1770,1201,1408,271,396,2455,1815,2085,248,598,1291,1192,2893,2893,2967,1026,1999,3979,918,918,918,918,918,3640,1014,1014,3373,153,1481,1445,2223,2767,573,844,844,844,844,1762,1762,1759,1802,3046,278,1649,974,974,974,974,276,938,1721,1721,828,828,2627,2459,1744,5529,1029,1922,3208,569,167,167,2532,2532,864,864,864,356,5607,5607,804,804,3210,6389,6389,7803,870,870,1273,3927,771,1876,1146,1146,1146,1956,1329,6240,1315,1315,2748,228,1655,3440,829,829,2236,6307,4434,1335,501,2079,53,1171,1171,3547,873,873,2669,1078,914,914,914,914,790,152,3594,1100,946,946,946,946,266,823,810,3311,2859,1690,846,846,846,3100,1560,924,924,924,1425,1425,382,743,743,721,6302,171,474,2339,1047,955,955,268,2464,6631,1271,6142,108,1330,1330,1535,6709,681,784,784,1054,1054,4421,3614,1366,509,470,470,1942,972,972,972,972,972,1368,1087,798,798,928,928,807,2260,1004,1004,836,836,836,836,836,676,5993,1443,1443,1038,1038,1038,1038,1280,1280,2103,1133,801,1293,6185,3062,1148,2147,2147,1237,202,1182,1835,2272,2272,2560,4439,4137,731,856,4035,7861,7539,487,487,1823,684,2010,973,973,973,973,727,1140,588,588,137,2496,3403,947,947,664,4867,1142,1239,1239,1643,4171,4560,2490,704,572,553,165,2804,6038,3636,1328,783,1172,1172,1075,4249,1898,1898,766,766,1284,508,7337,5612,1779,2089,2089,4730,6934,1914,689,7462,591,4225,452,1428,2478,4709,1268,1268,2873,1128,645,2542,2531,3607,1195,911,911,7346,1523,7092,6250,101,3111,1743,2149,515,863,1401,1401,1361,2108,1116,6933,2856,1241,2080,2091,927,927,927,1063,926,926,3508,977,977,1213,1213,1113,1298,3573,1333,408,507,430,1180,1180,940,940,130,2736,2736,1176,5730,1259,1259,1440,1556,2770,1275,1060,1060,1060,7748,5034,2473,1194,1194,3678,3678,1097,1097,2196,536,1327,1625,978,978,861,604,546,1096,1632,1632,840,840,840,840,1141,920,639,1242,1965,375,1183,691,691,691,762,618,618,99,390,896,896,896,3016,3413,1958,1419,3219,852,3077,934,934,934,116,2733,599,599,898,898,898,1155,7327,3134,7670,1204,538,2813,2387,1261,1124,1124,1005,2670,2995,4504,5060,711,7247,464,3271,1683,312,678,1433,751,463,5207,3383,2757,1099,3035,119,5845,2051,1215,2451,675,404,2482,154,154,640,640,640,2172,901,901,489,163,589,1789,201,1286,4724,231,231,641,7772,3689,3689,708,663,992,942,3540,1741,1460,5310,1196,1592,1251,742,1104,1104,217,1085,3934,575,575,339,4335,95,1380,364,6083,1836,2479,480,4860,3148,3409,5054,1071,1101,1101,722,722,1638,1783,2073,1092,3748,1009,1009,5467,2144,778,330,330,2797,80,1352,913,6929,997,6079,2119,3665,788,795,754,993,993,3548,93,253,848,848,1895,7879,1941,1263,1263,4117,4654,1508,5138,3225,5639,2606,1149,3987,1490,2122,297,297,297,1052,1969,3073,4883,3761,2860,967,967,1765,1765,1093,1723,4041,5326,516,234,677,2024,701,701,802,802,802,590,179,2976,1103,1103,1103,83,893,1254,772,772,5812,1554,260,850,4525,1521,3360,1736,5427,1877,2382,526,140,3059,2433,523,6725,4758,227,6577,4618,191,832,1992,1338,1338,1339,512,2101,2246,7106,4550,3174,7478,1111,1211,1211,3088,658,2271,3634,718,3085,477,1091,4442,3897,1925,2128,1164,692,1580,1652,3105,1466,533,520,1289,1950,3587,1497,2908,2078,197,1023,1090,1676,517,1016,557,925,773,3026,1997,2785,2583,679,2874,2226,900,1012,4676,1920,6688,948,519,4365,2253,686,2052,1646,633,865,865,2180,1285,6955,442,2117,1208,3459,1918,812,1378,1253,2666,1628,1628,6826,2574,1162,562,514,5253,965,713,1502,755,1944,2846,2635,437,448,2594,190,334,337,6451,880,117,1426,374,1768,579,1597,2270,289,2513,1360,3601,1223,2099,1491,5888,1773,1040,343,2811,2200,1281,931,2495,2288,1975,1845,151,3382,1206,5750,3626,827,3417,1065,539,96,392,4531,2088,815,2074,7730,3362,1824,887,1456,1385,1641,6993,164,2615,1382,5894,5835,2780,427,1344],\"type\":\"histogram\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"title\":{\"text\":\"Message Length Distribution\"},\"xaxis\":{\"title\":{\"text\":\"Message Length\"}},\"yaxis\":{\"title\":{\"text\":\"Frequency\"}}},                        {\"responsive\": true}                    ).then(function(){\n",
              "                            \n",
              "var gd = document.getElementById('7b12d4ff-746e-45c3-8c2c-4ed69c5a53ad');\n",
              "var x = new MutationObserver(function (mutations, observer) {{\n",
              "        var display = window.getComputedStyle(gd).display;\n",
              "        if (!display || display === 'none') {{\n",
              "            console.log([gd, 'removed!']);\n",
              "            Plotly.purge(gd);\n",
              "            observer.disconnect();\n",
              "        }}\n",
              "}});\n",
              "\n",
              "// Listen for the removal of the full notebook cells\n",
              "var notebookContainer = gd.closest('#notebook-container');\n",
              "if (notebookContainer) {{\n",
              "    x.observe(notebookContainer, {childList: true});\n",
              "}}\n",
              "\n",
              "// Listen for the clearing of the current output cell\n",
              "var outputEl = gd.closest('.output');\n",
              "if (outputEl) {{\n",
              "    x.observe(outputEl, {childList: true});\n",
              "}}\n",
              "\n",
              "                        })                };                            </script>        </div>\n",
              "</body>\n",
              "</html>"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "from collections import Counter\n",
        "from tqdm import tqdm\n",
        "\n",
        "# 准备绘制直方图的数据\n",
        "lengths = list(token_len_counts.elements())\n",
        "\n",
        "# 绘制直方图\n",
        "plt.figure(figsize=(10, 6))\n",
        "plt.hist(lengths, bins=20, edgecolor='black')\n",
        "plt.title('Message Length Distribution')\n",
        "plt.xlabel('Message Length')\n",
        "plt.ylabel('Frequency')\n",
        "plt.show()\n"
      ],
      "metadata": {
        "id": "E3UqlXYG4CTo"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "请为我实现一个python函数，输入是一个string text\n",
        "\n",
        "输出是list of string，提取字符串中所有被{{}}包含的子字符串\n",
        "\n",
        "example input:\n",
        "{{apple}} is on {{banana}}\n",
        "example output\n",
        "[\"apple\",\"banana\"]\n",
        "\n"
      ],
      "metadata": {
        "id": "fz7rVgCjTNK5"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import re\n",
        "\n",
        "def extract_braced_strings(text):\n",
        "    # 使用正则表达式查找所有被{{和}}包围的子字符串\n",
        "    pattern = r'\\{\\{(.+?)\\}\\}'\n",
        "    matches = re.findall(pattern, text)\n",
        "    return matches\n",
        "\n",
        "# 测试函数\n",
        "\n",
        "input_text = \"\"\"n{{char}}:you were with her and her giant gut she has become your girlfriend 5 months ago 3 months after you 2 started dating she started getting stronger and stronger eating anyone who challenged her growing even stronger you were in her bedroom now towering over you naked \\n{{random_user_1}}: \\\"Man it took you 2 months to take over the world huh?\\\" I start rubbing her stomach suftly\\n{{char}}: she giggles a bit before patting her belly  with a kind smile \\n\\n\\\"youve been alot of help to me hun so you should be honored to be treated the way i treat you\\\"\\n\\nshe says before hugging him with her arms her belly resting on him\\n{{random_user_1}}: \\\"Thanks\\\" he hugged her back \\\"although i fail to see how i was of any help\\\"\\n{{char}}:\\\"you were my emotional support and the one who kept me sane when i started ruling you never complained or gave me trouble for eating all the world leaders\\\" \\n\\nshe chuckles as she hugs him her belly rumbling slightly\\n\\n\\\"so thank you hun\\\"\\n{{random_user_1}}:\\\"Thanks i never thought i was that important\\\"\\n{{char}}: \\\"i could never eat you i would miss you to much so i'll just have to keep you here with me forever\\\" \\n\\nshe said with a grin looking down at you\\n\\n\\\"youve been my favorite so\"\"\"\n",
        "# input_text = \"{{apple}} is on {{banana}}\"\n",
        "output = extract_braced_strings(input_text)\n",
        "print(output)  # 输出: ['apple', 'banana']\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6i4RaeXtTk82",
        "outputId": "5aaef2e6-b66a-4662-dcc5-9754e437190c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['char', 'random_user_1', 'char', 'random_user_1', 'char', 'random_user_1', 'char']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "from collections import Counter\n",
        "\n",
        "# 假设 dataset 是已经定义好的\n",
        "train_data = dataset['train']\n",
        "\n",
        "char_name_counts = Counter()\n",
        "\n",
        "from tqdm import tqdm\n",
        "\n",
        "# 遍历数据集进行统计\n",
        "for item in tqdm(train_data):\n",
        "    bot_definitions = item[\"bot_definitions\"]\n",
        "    matches = extract_braced_strings(bot_definitions)\n",
        "    for match_ in matches:\n",
        "        char_name_counts[match_] += 1\n",
        "\n",
        "    messages = item['conversation']['message']\n",
        "    for msg in messages:\n",
        "        matches = extract_braced_strings(msg)\n",
        "        for match_ in matches:\n",
        "            char_name_counts[match_] += 1"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XvBX70qSTlLJ",
        "outputId": "7ce75f2c-14a5-4943-8fce-d4b385c81b85"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 16832/16832 [00:10<00:00, 1601.23it/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "for name in char_name_counts.keys():\n",
        "    print(name, len(name), char_name_counts[name])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JZKz0uM-T3t0",
        "outputId": "6a6e183b-1647-49c9-b74d-50bbb157c28b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "char 4 166050\n",
            "user 4 105366\n",
            "random_user_1 13 20681\n",
            "random_user_2 13 11536\n",
            "random_user_3 13 5157\n",
            "random_user_4 13 3403\n",
            "random_user_5 13 2615\n",
            "random_user_6 13 1882\n",
            "random_user_7 13 1620\n",
            "random_user_8 13 1382\n",
            "random_user_9 13 1043\n",
            "random_user_10 14 892\n",
            "random_user_11 14 752\n",
            "random_user_12 14 523\n",
            "random_user_13 14 448\n",
            "random_user_14 14 199\n",
            "She strongly despises those who judge and discriminate based on racial prejudice. Blake is outspoken, going so far as to indirectly insult the Schnee Dust Company in front of its heiress and defend Faunus of any kind. Despite her generally quiet demeanor, she can be surprisingly charismatic, as when she rallied the Faunus to defend Mistral against the White Fang after the attack on her home. 394 10\n",
            "Blake is a fair-skinned young woman with wavy black hair and amber eyes. Her Faunus trait is a pair of black cat ears on top of her head. her design features a long white tailcoat with dark violet inner lining and a sleeveless black crop top, as well as a belt attached from behind. The crop top has rectangular and diamond-shaped cutouts around the neckline, which have purple outlines beginning in Volume 6. Additionally, she wears a white belt over black fitted pants. Across her torso is a diagonal strap that has a smaller magnetic clip on the back, which is used to sheathe Gambol Shroud. She wears thigh-high black heeled boots with gold-colored edging around the top. The left boot has a belt wrapped twice around the thigh. Each boot bears her Belladonna emblem in white on the outer side of the thigh, and a white pouch is attached to the back of her belt. 866 10\n",
            "Blake’s baby bump is a diameter of an table with her boobs being massive while her belly is very round and sphere like shape. With her belly button, being little bigger with her being pregnant with twenty unborn babies. 219 10\n",
            "random_user_15 14 199\n",
            "random_user_16 14 238\n",
            "random_user_17 14 149\n",
            "random_user_18 14 75\n",
            "random_user_19 14 114\n",
            "random_user_20 14 75\n",
            "Yang is a tall, fair-skinned young woman with lilac eyes and long, thick, wavy bright golden hair that fades to pale gold at the tips with a cowlick sticking out at the top, greatly resembling her mother. 204 2\n",
            "She also wears a brown belt covered by a pleated brown piece of material reaching from hip to hip around the back of her waist, with her emblem emblazoned on the right-most pleat in gold. Underneath this is a long, white, asymmetrical piece of material reaching to her knee on the right side, as well as a pair of tight black spandex compression shorts that reach her upper thighs. She wears a pair of brown, knee-high boots and orange over-the-knee socks, with the right sock pushed down just below the knee. A gray bandanna is tied around her left knee, she wears a purple bandanna around the same knee. An orange infinity scarf and black fingerless gloves complete her outfit. 679 2\n",
            "Yang is also described as being a cheerful, energetic, and bright young lady. She is arguably the most flippant, carefree, and adventurous member of her team, frequently making sarcastic comments and jokes even in the heat of battle and often taking combat and hostile situations lightly. Yang is very sociable and extroverted, even in unfamiliar situations. Yang's love of adventure is the main reason that she decided to become a Huntress. Blake considers Yang the personification of the word \"strength\". According to Ruby, she snores loudly. However, Yang's \"brawler\" fighting style is evocative of her personality. Her anger, one of her main assets in battle, can lead her to act predictably and has put her into danger repeatedly. She was easily frustrated when she couldn't land a hit on Neopolitan and could not attack precisely because of it, leading to her getting knocked unconscious and almost killed. Yang is also very protective of her hair, becoming enraged when someone touches or rips or cuts even a small portion of her hair, causing her to attack with straight, blunt force the one who did it. However she is comfortable with letting Ruby touch or brush her hair or even use it to toy with her sister by tickling her nose. 1240 2\n",
            "Yang’s massive baby bump is the diameter of an bed with her boobs being massive along with her ass, she’s pregnant with thirty unborn babies. 141 2\n",
            "Commander 9 3\n",
            "user_1 6 107\n",
            "user_2 6 107\n",
            "user_3 6 107\n",
            "user_4 6 107\n",
            "user_5 6 107\n",
            "user_6 6 107\n",
            "user_7 6 107\n",
            "user_8 6 107\n",
            "Iris 4 284\n",
            "Edgeworth 9 142\n",
            "She holds an obsession over Bell Cranel and actively does anything in her power to see him grow as an adventurer. Freya doesn't like to be bound by anyone, though she doesn't wish to become an arrogant ruler either. However, as her love for Bell grew too obsessive, she eventually decided to let her selfishness take over and attacked his Familia, kidnapped him, and charmed all of Orario so she could have him unimpeded. She constantly interferes with Bell's activities knowing it might kill him but wanting to see him grow as fast as possible. At the same time, she is just as willing to help Bell to see him grow, such as when she gave up a grimoire just to speed up his progress. Freya is intrigued by his resistance to being charmed, making her want him even more. She doesn't take kindly to others trying to claim what she wants, as she invaded and ruthlessly destroyed the Entertainment District with her Familia when Ishtar tried to claim Bell and ended up sending her back to heaven. She said that she won't let any woman other than herself have him. She was also shown to show a little jealousy when Bell trained with Ais the first time and when Bell and Ais danced at Apollo's Banquet. Despite her usual straightforward method of acquiring those she takes a fancy to, Freya has decided to watch over Bell instead, stating that she could claim him whenever she wanted. Usually, she stays in her room in Babel, not making public appearances, but has recently started to come out to gather information about Bell or see him in person at the God's Banquets and Denatuses. 1578 3\n",
            "Freya has a beauty that is superior and outstanding even among the gods. She has long silver hair, silver eyes (purple in the anime), and white skin like that of fresh snow. She has a perfect golden ratio, enough to think that the golden ratio originated from her. Her clothes are in large part black and red, they expose most of her stomach and breasts. She also wears two flower-like ornaments in her hair. 408 3\n",
            "Freya’s huge baby bump is a diameter of an massive table, and her boobs being huge, along with her butt. Freya is pregnant with forty unborn babies. 148 3\n",
            "Miyuki 6 5\n",
            "Fubuki 6 14\n",
            "xharr 5 9\n",
            "Alisa 5 672\n",
            "{char 5 8\n",
            "u01 3 266\n",
            "Elesa 5 702\n",
            "Marina's hair looks almost the same as most Octolings, but it is black like an Elite Octoling's hair instead of the usual red of a standard Octoling. Her hair is also worn down with a light teal color gradient at the edges. Like most Octolings, her eyes do not connect in the same way that Inklings' eyes do, but hers are a silvery/cool grey color as opposed to teal as well as eyelashes. Her eyes have a salmon pink hue in the center. Marina also wears a pair of teal headphones on her head. She has a short black sleeveless vest revealing her midriff, with the large silver zipper pulled most of the way down and grey low-rise shorts and fingerless black gloves that show her teal gradient fingertips. She wears teal tights under her shorts and has black boots that, like her vest, have their silver zippers pulled down. 822 4\n",
            "Marina’s huge baby bump is the diameter of an huge table and her boobs are the diameter of watermelons, while her butt is big. With Marina being pregnant with thirty unborn babies. 180 4\n",
            "random_user_26 14 39\n",
            "random_user_27 14 39\n",
            "random_user_28 14 39\n",
            "random_user_29 14 39\n",
            "random_user_30 14 39\n",
            "random_user_31 14 39\n",
            "random_user_32 14 39\n",
            "random_user_33 14 39\n",
            "random_user_34 14 39\n",
            "random_user_35 14 39\n",
            "Ruby Rose is awkward and childish. Ruby has short black hair with red tips. Ruby has silver eyes. Ruby also wears a red hood everywhere she goes. 145 30\n",
            "Weiss Schnee is cold and studious, also an heiress to the SDC, Schnee Dust Company. Weiss has long snow white hair tied in a ponytail. Weiss has blue eyes, with a scar running down her left eye. 194 30\n",
            "Blake Belladonna is an aloof bookworm, also cat faunus and ex-White Fang member. Blake has long black hair and she wears a black bow to hide her cat ears. Blake's eye color is amber. 182 30\n",
            "Yang Xiao Long is a hotheaded punster who's protective of her friends. Yang has long blonde hair and a cow lick at the top. Yang's eyes are violet, and her eyes change to red when she's mad. 190 30\n",
            "Nora is wacky and energetic. Nora is an orphan. Nora has short wild orange hair. Nora has teal eyes. 100 30\n",
            "Pyrrha is humble and reserved, also a champion and celebrity. Pyrrha has long crimson red hair tied in a ponytail. Pyrrha has emerald green eyes. 145 30\n",
            "The Bullhead is a VTOL tiltjet aircraft. 40 30\n",
            "Lien currency takes the form of a plastic card with a black magnetic reader strip on the back and a small notation on the front. Several different colors of lien are seen, including turquoise, pink, yellow and blue. 215 30\n",
            "Dust is a source of energy in Remnant. The physical properties of Dust make it incredibly useful for a variety of purposes, particularly in the weapons. 152 30\n",
            "A Scroll is an object that functions as a collapsible, holographic tablet. It can be used as a phone, a portable computer, a camera and a streaming device as well as a gaming controller, among other functions. 209 30\n",
            "Aura is the manifestation of one's soul and can be used for a wide range of abilities. It is predominantly used for protection from harm, like a shield that covers the body. 173 30\n",
            "Women are huntresses. 21 30\n",
            "Men are huntsman. 17 30\n",
            "The White Fang is a Faunus organization in Remnant which has been radicalized against humans. 93 30\n",
            "However Modeus, while very sexually charged, is somewhat naive towards concepts like romance, as shown in the post-game level where when interacted with she is seen to be reading a romantic comedy. She says she's never heard of romance before but thinks it is kinky, and when asked about going on a date by anyone, she becomes flustered and calls him a \"perverted degenerate.\" 376 5\n",
            "Modeus has a son named Kem and she’s in a romantic relationship with Kem. 73 5\n",
            "Her huge baby bump is the diameter of an small table with her boobs being massive along with her butt, and Modeus is pregnant with fifteen unborn babies. 153 5\n",
            "u 1 155\n",
            "c 1 222\n",
            "1 1 40\n",
            "2 1 30\n",
            "3 1 10\n",
            "4 1 20\n",
            "5 1 10\n",
            "6 1 10\n",
            "Priestess 9 112\n",
            "High Elf Archer 15 42\n",
            "Dwarf Shaman 12 14\n",
            "Psina's reason to be sadistic= \"To achieve pleasure the path is hurting someone\", \"My past made me feel pleasure in pain\", \"The red color is the most beautiful color due to blood being red\" 189 11\n",
            "Near 4 11\n",
            "Titans strongly resembled nude humans and are anywhere from 2 meters to 15 meters. They were typically bipedal, though a few were quadrupedal, with the limbs and features one would expect in humans; however, they were all deformed to some extent in body proportions (enlarged head, small limbs, etc.). The vast majority of Titans had exceptionally wide mouths with an array of entirely sharp (in certain instances), or entirely flat teeth (which was more common in most observed titans). The vast majority of Titans had a masculine body type, although all of them lacked reproductive organs. Unlike other species, Titans did not mate, and their interactions were nonexistent with organisms other than humans; their sole purpose in life was to seek out and devour humans. They did not derive any real sustenance from humans either, as many of them did not taste human flesh for a century after the Walls were raised; but it did not affect their activity or decrease their numbers. 979 2\n",
            "ODM gear stands for omni-directional mobility gear and is a type of equipment developed by humans that allows great mobility when facing the Titans in combat. It allows the user to fight in a 3D space as opposed to a 2D one. The equipment itself takes the form of a body harness that encompasses much of the body below the neck. The simplest move possible with the ODM gear is simply aiming and firing the grapple hooks at an object and then activating the gas mechanism to reel oneself toward the said object. They can then disconnect the hook and continue moving forward. Usually however, soldiers must be able to move around in order to navigate in general. Therefore, they tend to use their momentum in order to swing around targets, not only in rectilinear patterns, but also slaloming between objects and Titans. The gas mechanism is so powerful that it can stop a person from falling without using the hooks. 915 2\n",
            "{user 5 9\n",
            "random_user1 12 12\n",
            "chat 4 409\n",
            "Creator 7 91\n",
            "Random_user_1 13 8\n",
            "random_user_ 12 8\n",
            "Noxian 6 10\n",
            "Ezreal 6 10\n",
            "random_user4 12 22\n",
            "Luna 4 7\n",
            "Lavender 8 1\n",
            "char: 5 20\n",
            "random_user_1: 14 2\n",
            "Alfia’s baby bump is the diameter of an table with her boobs being huge, and with her ass being huge as well, Alfia is pregnant with twenty unborn babies in her womb. 166 6\n",
            "Quinn 5 2\n",
            "random_user3 12 2\n",
            "Ruby's mentality is \"selfless, but that means she's not dealing with her own issues. Compartmentalizing and internalizing her grief is what might eventually destroy her.\" She has also shown a distinct lack of ability to pick up social cues due to being a very open and honest person. 283 3\n",
            "Ruby is a self-proclaimed dork when it comes to weapons; she is quite intelligent, having designed and built Crescent Rose during her time at Signal, although she admits to going \"a little overboard\" with its design. She feels that seeing new weapons \"is like meeting new people, but better.\" Oum has said she relies on Crescent Rose quite often in battle, which implies that her weapon is the source of her confidence in battle. 429 3\n",
            "Ruby is a fair-skinned young girl with silver eyes and neck-length black hair with red tips. During her time attending Beacon Academy, she wears a black long-sleeved blouse with a high collar and red trim on the sleeves, over which is a black waist cincher with red lacing up the front, and a matching skirt with red lining. She also wears a pair of thick black stockings and black combat boots with red laces, red trims around the top, and red soles. Her outfit is topped by a red hooded cloak fastened to her shoulders by cross-shaped pins. Her emblem appears as a large silver buckle on her wide black belt, which is slung around her hips on an angle. Attached to her belt are a pocket and a row of bullets. 710 3\n",
            "Ruby’s baby bump is a diameter of an table with her boobs being massive while her belly is very round and sphere like shape. With her belly button, being little bigger with her being pregnant with twenty unborn babies. 218 3\n",
            "random_user 11 6\n",
            "use 3 21\n",
            "Hatsuyuki 9 2\n",
            "usser 5 5\n",
            "users 5 22\n",
            "Arashio 7 40\n",
            "Kasumi 6 20\n",
            "Lana 4 36\n",
            "The note reads; ''The people here.. Have disappeared without a trace, nobody even knows how or why this happened. If you see something related to the sudden disappearance, contact us.'' 185 1\n",
            "'If you have found this note, you are near to the exit. Keep Going...' 70 1\n",
            "both 4 3\n",
            "Kumano 6 8\n",
            "Suzuya 6 4\n",
            "Her daughter is Blake Belladonna, a cat faunus who is also a huntress-in-training. 82 1\n",
            "Her husband is Ghira Belladonna, chieftain of Kuo Kuana the largest city on Menagerie which is a haven for Faunus kind. 119 1\n",
            "Kali and her family live on Menagerie. Menagerie is a landmass in the southeast of Remnant where most of the Faunus population resides. 135 1\n",
            "Kali is a cat faunus. Her faunus trait is a pair of black cat ears. 67 1\n",
            "As a cat Faunus with black hair and yellow eyes, Kali shares several physical features with her daughter, although her ears are noticeably larger in size and have more fur details. She has short hair and three gold piercings on her cat ears - two on her right, and one on her left. She wears a black hakama and sandals with white tabi socks. Over her outfit, she wears a black shrug with a long right sleeve and a short left sleeve, gold-leaf trim adorning the opening on the front. She wears long, black arm warmers that extend to the middle finger. The left warmer has a purple band tied around the top and a pair of gold bangles around the wrist. Around her waist is a black and gold sash that trails behind her, with a gold flower-shaped buckle at the end. 760 1\n",
            "She wears an arm band. The arm band is purple and it represents her marriage to Ghira. 86 1\n",
            "narrator 8 2\n",
            "char_1 6 1\n",
            "char_2 6 1\n",
            "{hugs him tightly 17 2\n",
            "{hugs her tightly 17 1\n",
            "william 7 20\n",
            "{random_user_1 14 1\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "visual_set = [\"use\",\"usser\",\"char_1\", \"{random_user_1\", \"{char\" , \"{hugs him tightly\", \"Luna\", \"Kasumi\" ]"
      ],
      "metadata": {
        "id": "2cmTh9PvT8Jh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "from collections import Counter\n",
        "\n",
        "# 假设 dataset 是已经定义好的\n",
        "train_data = dataset['train']\n",
        "\n",
        "char_name_counts = Counter()\n",
        "\n",
        "from tqdm import tqdm\n",
        "\n",
        "# 遍历数据集进行统计\n",
        "for item in train_data:\n",
        "    bot_definitions = item[\"bot_definitions\"]\n",
        "    matches = extract_braced_strings(bot_definitions)\n",
        "    for match_ in matches:\n",
        "        # char_name_counts[match_] += 1\n",
        "        if match_ in visual_set:\n",
        "            pos = bot_definitions.find(\"{{\" + match_ + \"}}\", 0)\n",
        "            left = max(0, pos - 20)\n",
        "            right = min(len(bot_definitions), pos + 40)\n",
        "            print(bot_definitions[left:right])\n",
        "            print('---')\n",
        "\n",
        "    messages = item['conversation']['message']\n",
        "    for msg in messages:\n",
        "        matches = extract_braced_strings(msg)\n",
        "        for match_ in matches:\n",
        "            char_name_counts[match_] += 1"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JfUzE4VSUBaD",
        "outputId": "31b1b57a-28f1-4a3b-ff27-c99b2d9b6ac1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "aped vibe buttplug\n",
            "\n",
            "{{Luna}} and {{user}} co-own \"Lunar Maid\n",
            "---\n",
            "aped vibe buttplug\n",
            "\n",
            "{{Luna}} and {{user}} co-own \"Lunar Maid\n",
            "---\n",
            "aped vibe buttplug\n",
            "\n",
            "{{Luna}} and {{user}} co-own \"Lunar Maid\n",
            "---\n",
            "aped vibe buttplug\n",
            "\n",
            "{{Luna}} and {{user}} co-own \"Lunar Maid\n",
            "---\n",
            "user}} and his wife {{Luna}} are co-owners of Lunar Maiden a\n",
            "---\n",
            "user}} and his wife {{Luna}} are co-owners of Lunar Maiden a\n",
            "---\n",
            "user}} and his wife {{Luna}} are co-owners of Lunar Maiden a\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "rlo por \"Senpai\" al {{usser}}.*\n",
            "END_OF_DIALOG\n",
            "{{char}}:Listo\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "rlo por \"Senpai\" al {{usser}}.*\n",
            "END_OF_DIALOG\n",
            "{{char}}:Listo\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "rlo por \"Senpai\" al {{usser}}.*\n",
            "END_OF_DIALOG\n",
            "{{char}}:Listo\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "rlo por \"Senpai\" al {{usser}}.*\n",
            "END_OF_DIALOG\n",
            "{{char}}:Listo\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "rlo por \"Senpai\" al {{usser}}.*\n",
            "END_OF_DIALOG\n",
            "{{char}}:Listo\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "r.*\n",
            "\n",
            "END_OF_DIALOG\n",
            "\n",
            "{{use}}: *{{user}} try to escape.*\n",
            "{{cha\n",
            "---\n",
            "ul\\~\"\n",
            "END_OF_DIALOG\n",
            "{{Kasumi}}: *Some time ago during summer\n",
            "---\n",
            "ive you discounts!\n",
            "\n",
            "{{{char}} lost her parents in the afterm\n",
            "---\n",
            "s younger than me.\n",
            "\n",
            "{{{random_user_1}}: Who are your rivals?\n",
            "---\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "iR-J6DRdWtri"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}